# Import anonymised .csv file
d_raw <- haven::read_sav("data/photo_rating_survey/photo_rating_raw_anon.sav") %>%
  mutate(
    passed_attention = Q8_4 %in% 1 & Q8_8 %in% 1 & !(Q8_1 %in% 1) & !(Q8_2 %in% 1) & !(Q8_3 %in% 1) & !(Q8_5 %in% 1) & !(Q8_6 %in% 1) & !(Q8_7 %in% 1)
  ) %>%
  count_prop(passed_attention) %>%
  mutate(id = ResponseId) %>%
  rename(duration = Duration__in_seconds_) %>%
  mutate(gender = Q5)

# Import photo IDs
photo_ids <- read_excel("data/photo_rating_survey/photo_ids.xlsx") %>%
  mutate(id_qualtrics = str_glue("A{id_qualtrics}")) %>%
  mutate(photo_gender = str_sub(id_scto, 1, 1)) %>%
  rename(photo_id = id_scto)

d_long <- d_raw %>%
  pivot_longer(matches("A\\d{1,2}_Q.*"), names_to = c("photo", ".value"), names_pattern = "(A\\d{1,2})_(Q.*)") %>%
  filter(!is.na(Q48)) %>%

  relocate(id, photo) %>%
  tidylog::left_join(photo_ids, by = c("photo" = "id_qualtrics"))

d_long %>% count_prop(Q50)

d_long %>% count(photo_id) %>% arrange(n) %>% print_all
d_long %>% filter(gender %in% 1:2) %>% count_prop(gender)

vars <- c("rich", "religion_hindu", "age", "lower_caste", "educated", "neatly_dressed", "comfortable_talking", "unsafe_in_home", "worried_talk_family", "spouse_unhappy_talk")
vars_z <- vars %>% paste0("_z")
var_labels <- c("Rich", "Hindu", "Age", "Scheduled\nCaste/Tribe", "Educated", "Neatly\ndressed",  "Comfortable\ntalking", "Unsafe\nin home", "Worried\ntalk w.\nfamily", "Spouse\nunhappy")

d_long_clean <- d_long %>%

  # Q48
  mutate(rich = 5 - Q48) %>%

  # q49
  mutate(
    religion = case_when(
      str_detect(Q49_5_TEXT, "Christian|Cheistian|Cristian|கிறிஸ்தவ மதத்தை") ~ "Christian",
      str_detect(Q49_5_TEXT, "No idea|Not sure|தெரியவில்லை|No clue|Any relgion|Don't know|cant say|you cannot judge") ~ "Don't know",
      str_detect(Q49_5_TEXT, "எந்த மதத்தை பின்பற்றினாலும் மற்றவர்களுக்கு அது தேவையற்றது.|All") ~ "Refuse",
      str_detect(Q49_5_TEXT, "Human|மனிதம்|சைவம்") ~ "Other",
      Q49 %in% 1:4 ~ as_factor(Q49),
      str_length(Q49_5_TEXT)==0 ~ "Refuse"
    )
  ) %>%
  count_prop(Q49, Q49_5_TEXT, religion) %>%

  mutate(religion_hindu = as.numeric(religion == "Hindu")) %>%

  #   Q50
  count_prop(Q50) %>%
  mutate(lower_caste = 5 - Q50) %>%
  count_prop(Q61) %>%
  mutate(unsafe_in_home = Q61) %>%

  count_prop(Q51) %>%
  mutate(age = 6 - Q51) %>%
  count_prop(Q52) %>%
  mutate(worried_talk_family = 5 - Q52) %>%
  mutate(spouse_unhappy_talk = 5 - Q53) %>%
  mutate(
    neatly_dressed = 5 - Q54,
    educated = 5 - Q55,
    comfortable_talking = 5 - Q56
  ) %>%
  mutate(
    across(all_of(vars), list(z = z_calc_std))
  )

# GRAPH - averages ---------------------------------------------------------------------------------------------------

d_extra_long <- d_long_clean %>%
  select(photo_id, photo_gender, all_of(vars_z)) %>%
  mutate(photo_gender_col = photo_gender) %>%
  pivot_longer(all_of(vars_z))

d_extra_long %>%
  bar_chart(x = photo_gender, y = value, facet = name, fill = photo_gender_col)

ggplot2::ggsave("outputs/figs/photo_rating_survey/photo_rating_survey_means.png", width = 10, height = 8)


# Number of ratings per photo (sample size) ----------------------------------------------------------------------------------------------------------------

d_long_clean %>% count(photo_id) %>%
  arrange(n) %>% print_all


# Caste stat ----------------------------------------------------------------------------------------------------------------

d_long_clean %>% count_prop(Q50) %>%
  group_by(photo_gender) %>%
  summarise(very_likely_lower_caste = mean_na(Q50 %in% 1))


# GRAPH - photo specific ----------------------------------------------------------------------------------------------------------------

d_extra_long %>%
  filter(name == "rich_z") %>%
  bar_chart(x = photo_id, fill = photo_gender, y = value)


var <- "rich_z"


graph_by_photo <- function(df, var) {
  df %>%
    filter(name == var) %>%
    group_by(photo_id, photo_gender) %>%
    summarise(mean_ci(value),
              n = n()) %>%
    ungroup %>%
    mutate(
      photo_id = fct_reorder(factor(photo_id), y)
    ) %>%

    ggplot(aes(x = photo_id, y = y, ymin = ymin, ymax = ymax, colour = photo_gender)) +
    geom_pointrange() +
    labs(y = var)
}

vars_z %>%
  walk(
    ~ {
      graph_by_photo(d_extra_long, .x)
      ggplot2::ggsave(paste0("outputs/figs/photo_rating_survey/", .x, ".png"), width = 6, height = 4)
    }
  )




# Are there treatment effects on the average [AGE/NEATNESS] conditional on gender ----------------------------------------------------------------------------------------------------------------

# [Photo property of selected] = Trans + Female + Treat + e
# Question - do I want to control for photo property of the OTHER?

# 1. Get dataset by photo (with averages)
photo_ratings_means <- d_long_clean %>%
  select(photo_id, photo_gender, all_of(vars_z), all_of(vars)) %>%
  group_by(photo_id, photo_gender) %>%
  summarise(across(c(all_of(vars_z), all_of(vars)), mean_na)) %>%
  #   Restandardise z-scores
  ungroup %>%
  mutate(
    across(all_of(vars_z), z_calc_std)
  )


r2_choices_with_photo_ratings <- r2_choices_num %>%
  count_prop(hiring_selected_photo) %>%
  mutate(hiring_selected_photo_id = hiring_selected_photo %>% str_replace("\\.jpg", "")) %>%

  left_join(
    photo_ratings_means,
    by = c("hiring_selected_photo_id" = "photo_id")
  )

models <- vars_z %>%
  map(
    ~ feols_custom(
      as.formula(paste0(.x, " ~ photo_gender + discussion_full + video_type")),
      data = r2_choices_with_photo_ratings %>% filter(discuss_type %in% c("control", "discussion_full")),
      cluster = "group_id",
      fixef = c("stratum_id"),
    )
  ) %>%
  set_names(vars_z)

tex_export(
  models %>% set_names(
    var_labels
  ),
  file = "outputs/tables/photo_rating_survey.tex",
  coef_rename = coef_label,
  gof_map = fe_label_no_fe,
  dep_means = list(
    "Mean: No discussion (private) + photo is male" = "discuss_type == 'control' & photo_gender == 'M'",
    "Mean: No discussion (private) + photo is female" = "discuss_type == 'control' & photo_gender == 'W'",
    "Mean: No discussion (private) + photo is trans" = "discuss_type == 'control' & photo_gender == 'T'"
  ),
  stat_vec = "long",
  additional_header = vec_to_custom_header(c(" ", rep("Dep var: Characteristic (Z) of photo that was chosen in outcome round", 10)))
)

# ALL RESULTS GO IN THE OPPOSITE DIRECTION
# e.g. discussion makes people more likely to select rich people, but trans people are perceived as being less rich
# ONLY one going in other direction is Hindu
# WITH CLUSTERING - all insignifciant

# How large is anti-caste discrimination (as benchmark for ex.) ----------------------------------------------------------------------------------------------------------------

# Create dataset with the DIFFERENCE in caste perceptions

photo_ratings_means %>% hist_basic(lower_caste_z)

photo_ratings_means %>% hist_basic(lower_caste) # varies between 2 and 3; not much variation (not strong signal)


vars_z %>%
  map(
    ~ feols_custom(
      as.formula(paste0(
        "r2_choose_comparator ~ ", .x, " + pair_includes_trans_alt + video_type_placebo + video_type_treatment + factor(stratum_id) + delivery_incentive_exp + phase + item_diff + r2_reliability_diff * r2_reliability_shown"
      )),
      data = r2_choices_with_photo_ratings %>% filter(discuss_type == "control"),
      cluster = "group_id",
      fixef = c("stratum_id"),
      coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
      coef_omit = "group_control|stratum_id|delivery_incentive_exp"
    )
  ) %>%
  tex_export()

# Which of these predict choices?
# - Not the demographic characteristics (signals are not strong enough - no variation)
# - Comfortable talking yes
# - Unsafe in home yes
# - Worried talk with family yes


# Control for photo characs in standard regression ----------------------------------------------------------------------------------------------------------------
# r2_choices_num$age_z
# Dataset with photo characteristic diff
r2_choices_w_rating_diffs <- r2_choices_num %>%
  select(-matches("^age")) %>% 
  mutate(r2_photo_1 = r2_photo_1 %>% str_replace("\\.jpg", ""),
         r2_photo_2 = r2_photo_2 %>% str_replace("\\.jpg", "")) %>%
  left_join(
    photo_ratings_means,
    by = c("r2_photo_1" = "photo_id")
  ) %>%
  tidylog::left_join(
    photo_ratings_means,
    by = c("r2_photo_2" = "photo_id"),
    suffix = c("_1", "_2")
  )

# For each vars_z, take the difference between the two photos
for (i in seq_along(vars_z)) {
  r2_choices_w_rating_diffs[[str_glue("{vars_z[i]}_diff")]] <- r2_choices_w_rating_diffs[[str_glue("{vars_z[i]}_2")]] - r2_choices_w_rating_diffs[[str_glue("{vars_z[i]}_1")]]
}

vars_diff <- paste0(vars_z, "_diff")


list(
  "Chose worker in private\noutcome round (=1)" = feols_custom(
    as.formula(paste0("r2_choose_comparator ~ pair_includes_trans * (discussion_full) + item_diff + r2_reliability_diff * r2_reliability_shown + ", fml_sum(vars_diff))),
    data = r2_choices_w_rating_diffs %>% filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "round"),
    cluster = "group_id",
    ri = FALSE,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
    coef_omit = "group_control|stratum_id|delivery_incentive_exp"
  ),
   "Chose worker in private\noutcome round (=1)" = feols_custom(
    as.formula(paste0("r2_choose_comparator ~ pair_includes_trans * (discussion_full) + (", fml_sum(vars_diff), ") * discussion_full + item_diff + r2_reliability_diff * r2_reliability_shown")),
    data = r2_choices_w_rating_diffs %>% filter(discuss_type %in% c("control", "discussion_full")),
    fixef = c("stratum_id", "delivery_incentive_exp", "comparator_order_in_pair", "phase", "round"),
    cluster = "group_id",
    ri = FALSE,
    coef_keep = "^pair_includes_trans$|(^pair_includes_trans_alt\\:discussion_full$)|^discussion_full$|item_diff|r2_reliability_diff|r2_reliability_shown",
    coef_omit = "group_control|stratum_id|delivery_incentive_exp"
  )
) %>%
  tex_export(
    file = "outputs/tables/photo_rating_survey_diffs.tex",
    coef_rename = coef_label,
    gof_map = fe_label_no_fe,
    coef_reorder = c("pair_includes_trans:discussion_full"),
    coef_omit = vars_to_regex(c("stratum_id", "Intercept", "group_control", "pair_includes_trans_alt$", "phase", "reliability", "item", control_vars[!(control_vars == "age")], "delivery")),
    stat_vec = "({std.error}) [{p.value}]",
  )

